/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * RELAGGS.java
 * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
 */

package weka.filters.unsupervised.attribute;

import weka.core.Attribute;
import weka.core.AttributeStats;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.SimpleBatchFilter;

import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;

/**
 * <!-- globalinfo-start --> A propositionalization filter inspired by the
 * RELAGGS algorithm.<br/>
 * It processes all relational attributes that fall into the user defined range
 * (all others are skipped, i.e., not added to the output). Currently, the
 * filter only processes one level of nesting.<br/>
 * The class attribute is not touched.<br/>
 * <br/>
 * For more information see:<br/>
 * <br/>
 * M.-A. Krogel, S. Wrobel: Facets of Aggregation Approaches to
 * Propositionalization. In: Work-in-Progress Track at the Thirteenth
 * International Conference on Inductive Logic Programming (ILP), 2003.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <pre>
 * &#64;inproceedings{Krogel2003,
 *    author = {M.-A. Krogel and S. Wrobel},
 *    booktitle = {Work-in-Progress Track at the Thirteenth International Conference on Inductive Logic Programming (ILP)},
 *    editor = {T. Horvath and A. Yamamoto},
 *    title = {Facets of Aggregation Approaches to Propositionalization},
 *    year = {2003},
 *    PDF = {http://kd.cs.uni-magdeburg.de/\~krogel/papers/aggs.pdf}
 * }
 * </pre>
 * <p/>
 * <!-- technical-bibtex-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -D
 *  Turns on output of debugging information.
 * </pre>
 * 
 * <pre>
 * -R &lt;index1,index2-index4,...&gt;
 *  Specify list of string attributes to convert to words.
 *  (default: select all relational attributes)
 * </pre>
 * 
 * <pre>
 * -V
 *  Inverts the matching sense of the selection.
 * </pre>
 * 
 * <pre>
 * -C &lt;num&gt;
 *  Max. cardinality of nominal attributes. If a nominal attribute
 *  has more values than this upper limit, then it will be skipped.
 *  (default: 20)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 5547 $
 */
public class RELAGGS extends SimpleBatchFilter implements
		TechnicalInformationHandler {

	/** for serialization */
	private static final long serialVersionUID = -3333791375278589231L;

	/** the max. cardinality for nominal attributes */
	protected int m_MaxCardinality = 20;

	/**
	 * the range of attributes to process (only relational ones will be
	 * processed)
	 */
	protected Range m_SelectedRange = new Range("first-last");

	/**
	 * stores the attribute statistics
	 * <code>att_index-att_index_in_rel_att &lt;-&gt; AttributeStats</code>
	 */
	protected Hashtable<String, AttributeStats> m_AttStats = new Hashtable<String, AttributeStats>();

	/**
	 * Returns a string describing this filter
	 * 
	 * @return a description of the filter suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String globalInfo() {
		return "A propositionalization filter inspired by the RELAGGS algorithm.\n"
				+ "It processes all relational attributes that fall into the user defined "
				+ "range (all others are skipped, i.e., not added to the output). "
				+ "Currently, the filter only processes one level of nesting.\n"
				+ "The class attribute is not touched.\n"
				+ "\n"
				+ "For more information see:\n\n"
				+ getTechnicalInformation().toString();
	}

	/**
	 * Returns an instance of a TechnicalInformation object, containing detailed
	 * information about the technical background of this class, e.g., paper
	 * reference or book this class is based on.
	 * 
	 * @return the technical information about this class
	 */
	public TechnicalInformation getTechnicalInformation() {
		TechnicalInformation result;

		result = new TechnicalInformation(Type.INPROCEEDINGS);
		result.setValue(Field.AUTHOR, "M.-A. Krogel and S. Wrobel");
		result.setValue(Field.TITLE,
				"Facets of Aggregation Approaches to Propositionalization");
		result.setValue(
				Field.BOOKTITLE,
				"Work-in-Progress Track at the Thirteenth International Conference on Inductive Logic Programming (ILP)");
		result.setValue(Field.EDITOR, "T. Horvath and A. Yamamoto");
		result.setValue(Field.YEAR, "2003");
		result.setValue(Field.PDF,
				"http://kd.cs.uni-magdeburg.de/~krogel/papers/aggs.pdf");

		return result;
	}

	/**
	 * Returns an enumeration describing the available options.
	 * 
	 * @return an enumeration of all the available options.
	 */
	public Enumeration listOptions() {
		Vector result;
		Enumeration en;

		result = new Vector();

		en = super.listOptions();
		while (en.hasMoreElements())
			result.addElement(en.nextElement());

		result.addElement(new Option(
				"\tSpecify list of string attributes to convert to words.\n"
						+ "\t(default: select all relational attributes)", "R",
				1, "-R <index1,index2-index4,...>"));

		result.addElement(new Option(
				"\tInverts the matching sense of the selection.", "V", 0, "-V"));

		result.addElement(new Option(
				"\tMax. cardinality of nominal attributes. If a nominal attribute\n"
						+ "\thas more values than this upper limit, then it will be skipped.\n"
						+ "\t(default: 20)", "C", 1, "-C <num>"));

		return result.elements();
	}

	/**
	 * Parses the options for this object.
	 * <p/>
	 * 
	 * <!-- options-start --> Valid options are:
	 * <p/>
	 * 
	 * <pre>
	 * -D
	 *  Turns on output of debugging information.
	 * </pre>
	 * 
	 * <pre>
	 * -R &lt;index1,index2-index4,...&gt;
	 *  Specify list of string attributes to convert to words.
	 *  (default: select all relational attributes)
	 * </pre>
	 * 
	 * <pre>
	 * -V
	 *  Inverts the matching sense of the selection.
	 * </pre>
	 * 
	 * <pre>
	 * -C &lt;num&gt;
	 *  Max. cardinality of nominal attributes. If a nominal attribute
	 *  has more values than this upper limit, then it will be skipped.
	 *  (default: 20)
	 * </pre>
	 * 
	 * <!-- options-end -->
	 * 
	 * @param options
	 *            the options to use
	 * @throws Exception
	 *             if setting of options fails
	 */
	public void setOptions(String[] options) throws Exception {
		String tmpStr;

		tmpStr = Utils.getOption('R', options);
		if (tmpStr.length() != 0)
			setSelectedRange(tmpStr);
		else
			setSelectedRange("first-last");

		setInvertSelection(Utils.getFlag('V', options));

		tmpStr = Utils.getOption('C', options);
		if (tmpStr.length() != 0)
			setMaxCardinality(Integer.parseInt(tmpStr));
		else
			setMaxCardinality(20);

		super.setOptions(options);
	}

	/**
	 * Gets the current settings of the classifier.
	 * 
	 * @return an array of strings suitable for passing to setOptions
	 */
	public String[] getOptions() {
		int i;
		Vector<String> result;
		String[] options;

		result = new Vector<String>();

		options = super.getOptions();
		for (i = 0; i < options.length; i++)
			result.add(options[i]);

		result.add("-R");
		result.add(getSelectedRange().getRanges());

		if (getInvertSelection())
			result.add("-V");

		result.add("-C");
		result.add("" + getMaxCardinality());

		return result.toArray(new String[result.size()]);
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String maxCardinalityTipText() {
		return "The maximum number of values a nominal attribute can have before it's skipped.";
	}

	/**
	 * Sets the maximum number of values allowed for nominal attributes, before
	 * they're skipped.
	 * 
	 * @param value
	 *            the maximum value.
	 */
	public void setMaxCardinality(int value) {
		m_MaxCardinality = value;
	}

	/**
	 * Gets the maximum number of values allowed for nominal attributes, before
	 * they're skipped.
	 * 
	 * @return the maximum number.
	 */
	public int getMaxCardinality() {
		return m_MaxCardinality;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String attributeIndicesTipText() {
		return "Specify range of attributes to act on; "
				+ "this is a comma separated list of attribute indices, with "
				+ "\"first\" and \"last\" valid values; Specify an inclusive "
				+ "range with \"-\"; eg: \"first-3,5,6-10,last\".";
	}

	/**
	 * Set the range of attributes to process.
	 * 
	 * @param value
	 *            the new range.
	 */
	public void setSelectedRange(String value) {
		m_SelectedRange = new Range(value);
	}

	/**
	 * Gets the current range selection.
	 * 
	 * @return current selection.
	 */
	public Range getSelectedRange() {
		return m_SelectedRange;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String invertSelectionTipText() {
		return "Set attribute selection mode. If false, only selected "
				+ "attributes in the range will be worked on; if "
				+ "true, only non-selected attributes will be processed.";
	}

	/**
	 * Sets whether selected columns should be processed or skipped.
	 * 
	 * @param value
	 *            the new invert setting
	 */
	public void setInvertSelection(boolean value) {
		m_SelectedRange.setInvert(value);
	}

	/**
	 * Gets whether the supplied columns are to be processed or skipped
	 * 
	 * @return true if the supplied columns will be kept
	 */
	public boolean getInvertSelection() {
		return m_SelectedRange.getInvert();
	}

	/**
	 * Returns the Capabilities of this filter.
	 * 
	 * @return the capabilities of this object
	 * @see Capabilities
	 */
	public Capabilities getCapabilities() {
		Capabilities result = super.getCapabilities();
		result.disableAll();

		// attributes
		result.enable(Capability.NOMINAL_ATTRIBUTES);
		result.enable(Capability.NUMERIC_ATTRIBUTES);
		result.enable(Capability.DATE_ATTRIBUTES);
		result.enable(Capability.RELATIONAL_ATTRIBUTES);
		result.enable(Capability.MISSING_VALUES);

		// class
		result.enable(Capability.NOMINAL_CLASS);
		result.enable(Capability.NUMERIC_CLASS);
		result.enable(Capability.DATE_CLASS);
		result.enable(Capability.MISSING_CLASS_VALUES);
		result.enable(Capability.NO_CLASS);

		return result;
	}

	/**
	 * Determines the output format based on the input format and returns this.
	 * In case the output format cannot be returned immediately, i.e.,
	 * immediateOutputFormat() returns false, then this method will be called
	 * from batchFinished().
	 * 
	 * @param inputFormat
	 *            the input format to base the output format on
	 * @return the output format
	 * @throws Exception
	 *             in case the determination goes wrong
	 * @see #hasImmediateOutputFormat()
	 * @see #batchFinished()
	 */
	protected Instances determineOutputFormat(Instances inputFormat)
			throws Exception {

		Instances result;
		Instances relFormat;
		FastVector atts;
		int i;
		int n;
		int m;
		int clsIndex;
		Attribute att;
		String prefix;

		m_SelectedRange.setUpper(inputFormat.numAttributes() - 1);

		atts = new FastVector();
		clsIndex = -1;
		for (i = 0; i < inputFormat.numAttributes(); i++) {
			// we don't process the class
			if (i == inputFormat.classIndex()) {
				clsIndex = atts.size();
				atts.addElement(inputFormat.attribute(i).copy());
				continue;
			}

			if (!inputFormat.attribute(i).isRelationValued()) {
				atts.addElement(inputFormat.attribute(i).copy());
				continue;
			}

			if (!m_SelectedRange.isInRange(i)) {
				if (getDebug())
					System.out.println("Attribute " + (i + 1) + " ("
							+ inputFormat.attribute(i).name() + ") skipped.");
				continue;
			}

			// process relational attribute
			prefix = inputFormat.attribute(i).name() + "_";
			relFormat = inputFormat.attribute(i).relation();
			for (n = 0; n < relFormat.numAttributes(); n++) {
				att = relFormat.attribute(n);

				if (att.isNumeric()) {
					atts.addElement(new Attribute(prefix + att.name() + "_MIN"));
					atts.addElement(new Attribute(prefix + att.name() + "_MAX"));
					atts.addElement(new Attribute(prefix + att.name() + "_AVG"));
					atts.addElement(new Attribute(prefix + att.name()
							+ "_STDEV"));
					atts.addElement(new Attribute(prefix + att.name() + "_SUM"));
				} else if (att.isNominal()) {
					if (att.numValues() <= m_MaxCardinality) {
						for (m = 0; m < att.numValues(); m++)
							atts.addElement(new Attribute(prefix + att.name()
									+ "_" + att.value(m) + "_CNT"));
					} else {
						if (getDebug())
							System.out.println("Attribute " + (i + 1) + "/"
									+ (n + 1) + " ("
									+ inputFormat.attribute(i).name() + "/"
									+ att.name() + ") skipped, "
									+ att.numValues() + " > "
									+ m_MaxCardinality + ".");
					}
				} else {
					if (getDebug())
						System.out.println("Attribute " + (i + 1) + "/"
								+ (n + 1) + " ("
								+ inputFormat.attribute(i).name() + "/"
								+ att.name() + ") skipped.");
				}
			}
		}

		// generate new format
		result = new Instances(inputFormat.relationName(), atts, 0);
		result.setClassIndex(clsIndex);

		// neither string nor relational attributes need to be copied to the
		// output:
		initOutputLocators(result, new int[0]);

		return result;
	}

	/**
	 * Processes the given data (may change the provided dataset) and returns
	 * the modified version. This method is called in batchFinished().
	 * 
	 * @param instances
	 *            the data to process
	 * @return the modified data
	 * @throws Exception
	 *             in case the processing goes wrong
	 * @see #batchFinished()
	 */
	protected Instances process(Instances instances) throws Exception {
		Instances result;
		Instance inst;
		Instance newInst;
		Instances relInstances;
		int k;
		int l;
		int i;
		int n;
		int m;
		AttributeStats stats;
		Attribute att;

		result = getOutputFormat();

		// initialize attribute statistics
		m_AttStats.clear();

		// collect data for all relational attributes
		for (i = 0; i < instances.numAttributes(); i++) {
			if (i == instances.classIndex())
				continue;

			if (!instances.attribute(i).isRelationValued())
				continue;

			if (!m_SelectedRange.isInRange(i))
				continue;

			// compute statistics
			for (k = 0; k < instances.numInstances(); k++) {
				relInstances = instances.instance(k).relationalValue(i);

				for (n = 0; n < relInstances.numAttributes(); n++) {
					att = relInstances.attribute(n);
					stats = null;

					if (att.isNumeric()
							|| (att.isNominal() && att.numValues() <= m_MaxCardinality)) {
						stats = relInstances.attributeStats(n);
						m_AttStats.put(k + "-" + i + "-" + n, stats);
					}
				}
			}
		}

		// convert data
		for (k = 0; k < instances.numInstances(); k++) {
			inst = instances.instance(k);
			newInst = new Instance(result.numAttributes());
			newInst.setWeight(inst.weight());

			l = 0;
			for (i = 0; i < instances.numAttributes(); i++) {
				if (!instances.attribute(i).isRelationValued()) {
					newInst.setValue(l, inst.value(i));
					l++;
				} else {
					if (!m_SelectedRange.isInRange(i))
						continue;

					// replace relational data with statistics
					relInstances = inst.relationalValue(i);
					for (n = 0; n < relInstances.numAttributes(); n++) {
						att = relInstances.attribute(n);
						stats = (AttributeStats) m_AttStats.get(k + "-" + i
								+ "-" + n);

						if (att.isNumeric()) {
							newInst.setValue(l, stats.numericStats.min);
							l++;
							newInst.setValue(l, stats.numericStats.max);
							l++;
							newInst.setValue(l, stats.numericStats.mean);
							l++;
							newInst.setValue(l, stats.numericStats.stdDev);
							l++;
							newInst.setValue(l, stats.numericStats.sum);
							l++;
						} else if (att.isNominal()
								&& att.numValues() <= m_MaxCardinality) {
							for (m = 0; m < att.numValues(); m++) {
								newInst.setValue(l, stats.nominalCounts[m]);
								l++;
							}
						}
					}
				}
			}

			result.add(newInst);
		}

		return result;
	}

	/**
	 * Returns the revision string.
	 * 
	 * @return the revision
	 */
	public String getRevision() {
		return RevisionUtils.extract("$Revision: 5547 $");
	}

	/**
	 * runs the filter with the given arguments
	 * 
	 * @param args
	 *            the commandline arguments
	 */
	public static void main(String[] args) {
		runFilter(new RELAGGS(), args);
	}
}
